Load libraries

library('readr')
library('dplyr')
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("tidylog")
## 
## Attaching package: 'tidylog'
## The following objects are masked from 'package:dplyr':
## 
##     add_count, add_tally, anti_join, count, distinct,
##     distinct_all, distinct_at, distinct_if, filter, filter_all,
##     filter_at, filter_if, full_join, group_by, group_by_all,
##     group_by_at, group_by_if, inner_join, left_join, mutate,
##     mutate_all, mutate_at, mutate_if, right_join, select,
##     select_all, select_at, select_if, semi_join, summarise,
##     summarise_all, summarise_at, summarise_if, summarize,
##     summarize_all, summarize_at, summarize_if, tally, top_n,
##     transmute, transmute_all, transmute_at, transmute_if
## The following object is masked from 'package:stats':
## 
##     filter
library("forcats")
library("lubridate")
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library("anytime")
library("rpivotTable")
library("DT")

Load data

## data downloaded from here: 
## https://bouldercolorado.gov/open-data/police-stop-demographics/

raw_results <- read_csv("police_stop_data_results_2018.csv")
## Parsed with column specification:
## cols(
##   appkey = col_character(),
##   appid = col_double(),
##   itemcode = col_character(),
##   itemdesc = col_character(),
##   addtime = col_character()
## )
raw_main    <- read_csv("police_stop_data_main_2018.csv")
## Parsed with column specification:
## cols(
##   stopdate = col_character(),
##   stoptime = col_double(),
##   streetnbr = col_character(),
##   streetdir = col_logical(),
##   street = col_character(),
##   Min = col_double(),
##   sex = col_character(),
##   race = col_character(),
##   ethnic = col_character(),
##   `Year of birth` = col_double(),
##   enfaction = col_character(),
##   rpmainid = col_double()
## )
police_stop_data_dictionary <- read_csv("police_stop_data_dictionary.csv")
## Parsed with column specification:
## cols(
##   Dataset = col_character(),
##   `Column Name` = col_character(),
##   Description = col_character()
## )

View Raw Results table

datatable(raw_results)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

View Raw Main table

datatable(raw_main)

Format data

raw_joined <- raw_results %>% 
  full_join(raw_main, by = c("appid" = "rpmainid"))
## full_join: added 8 rows and added 11 columns (stopdate, stoptime, streetnbr, streetdir, street, …)
format_joined <- raw_joined %>% 
  rename(birth_year            = `Year of birth`, 
         stop_duration_minutes = Min, 
         boulder_resident      = enfaction, 
         # stop_date             = stopdate, 
         stop_ID               = appid, 
         item_code             = itemcode, 
         item_description      = itemdesc,
         activity_type         = appkey) %>% 
  mutate(item_code        = as_factor(item_code), 
         item_description = as_factor(item_description), 
         activity_type  = factor(activity_type, 
                                 levels = c("RPT1", "RPT2", "RPT3", 
                                            "RPT4", "RPT5", "RPT6", 
                                            "RPT7", "1"), 
                                 labels = c("Type of Stop",
                                            "Stop Reason", 
                                            "Search Conducted", 
                                            "Search Authority", 
                                            "Contraband Found", 
                                            "Result of Stop", 
                                            "Type Code Not Disclosed", 
                                            "Data error")),
         activity_date_time = dmy_hm(addtime), 
         stop_date          = dmy(stopdate),
         stop_date_time     = dmy_hm(paste(stop_date, stoptime)), 
         sex                = as_factor(sex), 
         race               = factor(race, 
                                     levels = c("A", "B", "I", "U", "W"), 
                                     labels = c("Asian", 
                                                "Black or African American", 
                                                "American Indian or Alaskan Native", 
                                                "Unknown", 
                                                "White")), 
         ethnic        = factor(ethnic, 
                                levels = c("H", "N"), 
                                labels = c("Hispanic", "Non-Hispanic")), 
         boulder_resident = as_factor(boulder_resident)) %>% 
  # streetdir not currently used, per data dict
  select(-streetdir) 
## Warning: 23997 failed to parse.
## Warning: 24108 failed to parse.
## Warning: All formats failed to parse. No formats found.
## mutate: converted 'activity_type' from character to factor (0 new NA)
## mutate: converted 'item_code' from character to factor (0 new NA)
## mutate: converted 'item_description' from character to factor (0 new NA)
## mutate: converted 'sex' from character to factor (0 new NA)
## mutate: converted 'race' from character to factor (0 new NA)
## mutate: converted 'ethnic' from character to factor (0 new NA)
## mutate: converted 'boulder_resident' from character to factor (0 new NA)
## mutate: new variable 'activity_date_time' with 4120 unique values and 61% NA
## mutate: new variable 'stop_date' with 146 unique values and 61% NA
## mutate: new variable 'stop_date_time' with one unique value and 100% NA
## select: dropped one variable (streetdir)
glimpse(format_joined)
## Observations: 39,577
## Variables: 18
## $ activity_type         <fct> Type of Stop, Type of Stop, Stop Reason, S…
## $ stop_ID               <dbl> 1, 2862, 2862, 2862, 2862, 2862, 2863, 286…
## $ item_code             <fct> VEH, VEH, SPEE, *NO, WAR, NO, VEH, RECK, *…
## $ item_description      <fct> VEHICLE, VEHICLE, TRAFFIC-SPEEDING, *NO SE…
## $ addtime               <chr> "9/26/2018 15:09", "1/1/2018 16:26", "1/1/…
## $ stopdate              <chr> NA, "01/01/2018", "01/01/2018", "01/01/201…
## $ stoptime              <dbl> NA, 1619, 1619, 1619, 1619, 1619, 1630, 16…
## $ streetnbr             <chr> NA, "4561", "4561", "4561", "4561", "4561"…
## $ street                <chr> NA, "ARAPAHOE AVE/MACARTHUR DR", "ARAPAHOE…
## $ stop_duration_minutes <dbl> NA, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 35,…
## $ sex                   <fct> NA, M, M, M, M, M, M, M, M, M, M, M, M, M,…
## $ race                  <fct> NA, White, White, White, White, White, Whi…
## $ ethnic                <fct> NA, Non-Hispanic, Non-Hispanic, Non-Hispan…
## $ birth_year            <dbl> NA, 1976, 1976, 1976, 1976, 1976, 2000, 20…
## $ boulder_resident      <fct> NA, Y, Y, Y, Y, Y, N, N, N, N, N, N, N, N,…
## $ activity_date_time    <dttm> NA, 2018-01-01 16:26:00, 2018-01-01 16:26…
## $ stop_date             <date> NA, 2018-01-01, 2018-01-01, 2018-01-01, 2…
## $ stop_date_time        <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
# Create pivot table 

sub_tbl_for_pivot <- format_joined %>% 
  # remove cols not needed for the pivot table 
  select(
    # can't get date time working right now, so excluding
    # -addtime, 
    # -stopdate,
    -stop_date, 
    -stoptime, 
    -activity_date_time,
    -stop_date_time
  ) %>% 
  # rearrange cols
  select(stop_ID, 
         stopdate, 
         stop_duration_minutes,
         streetnbr, 
         street,
         sex, 
         race, 
         ethnic, 
         birth_year, 
         boulder_resident, 
         activity_type, 
         addtime,
         item_code, 
         item_description) %>% 
  arrange(stopdate, stop_ID)
## select: dropped 4 variables (stoptime, activity_date_time, stop_date, stop_date_time)

View data

datatable(sub_tbl_for_pivot)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Table of stop types by race

rpivotTable(sub_tbl_for_pivot, 
            rows = c("activity_type", 
                     "item_description"), 
            cols = "race", 
            aggregatorName = "Count Unique Values", 
            vals = "stop_ID", 
            rendererName = "Table")